# Keep things nice and tidy, all libraries go here
library(readxl)
library(tidyverse)
<<<<<<< HEAD
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.3.2 ✔ purrr 0.3.4
## ✔ tibble 3.0.4 ✔ dplyr 1.0.2
## ✔ tidyr 1.1.2 ✔ stringr 1.4.0
## ✔ readr 1.4.0 ✔ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
=======
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
>>>>>>> 2f5fc9924b4aa2e70717182be02776b86c33f31c
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(svglite)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(ggsci)
# for color palettes
library(paletti)
library(pals)
data <- read_excel("data/data_post_IEEE_fix.xlsx", skip = 1, sheet = 1)
## New names:
## * `` -> ...35
## * `` -> ...68
data <- data %>% filter(is.na(Exclude))
# Years without any publication (for easy slicing)
years_no_publications <- c("1974",
"1975",
"1976",
"1978")
# LABELS so slicing will not become a mess
swebok_areas_labels = c("SR",
"SD",
"SC",
"ST",
"SM",
"SCM",
"SEM",
"SEP",
"SEMM",
"SQ",
"SEPP",
"SEE",
"CF",
"MF",
"EF")
swebok_areas_labels_no_foundation = c("SR",
"SD",
"SC",
"ST",
"SM",
"SCM",
"SEM",
"SEP",
"SEMM",
"SQ",
"SEPP",
"SEE")
swebok_areas_labels_long = c("Requirements",
"Design",
"Construction",
"Testing",
"Maintainance",
"Config. Mgmt.",
"SE Mgmt.",
"SE Processes",
"SE Models&Methods",
"Software Quality",
"SE Prof. Practice",
"SE Economics")
cognitive_concepts_labels <- c("Attention",
"Selective attention",
"Divided attention",
"Sustained attention",
"Memory",
"Working memory",
"Short-term memory",
"Long-term memory",
"Cognitive load",
# "Cognitive control",
"Intrinsic CL",
"Extrinsic CL",
"Perception",
"Problem solving",
"Reasoning",
"Decision making",
"Cognitive biases",
"Knowledge",
"Explicit knowledge",
"Tacit knowledge",
"Techn. tacit knowl.",
"Cogn. tacit knowl.")
measures_labels <- c("Qualit. measures",
"Fieldwork",
"Interview",
"Task-based",
"Open observation",
"Quantit. measures",
"Task performance",
"Physiological meas.",
"Subjective ratings",
"Behavioral meas.")
# COLORS
tol9qualitative <- c("#332288",
"#88CCEE",
"#44AA99",
"#117733",
"#999933",
"#DDCC77",
"#CC6677",
"#882255",
"#AA4499")
NPG_modified <- c("#F5E144",
"#4DBBD5FF",
"#00A087FF",
"#3C5488FF",
"#F39B7FFF",
"#8491B4FF",
"#91D1C2FF",
"#DC0000FF",
"#7E6148FF")
col25 <- c(
"dodgerblue2", "#E31A1C",
"green4",
"#6A3D9A",
"#FF7F00",
"black", "gold1",
"skyblue2", "#FB9A99",
"palegreen2",
"#CAB2D6",
"#FDBF6F",
"gray70", "khaki2",
"maroon", "orchid1", "deeppink1", "blue1", "steelblue4",
"darkturquoise", "green1", "yellow4", "yellow3",
"darkorange4", "brown"
)
# Necessary for groupying by high-level category
add_high_level_concepts_to_data <- function(data) {
data %>%
mutate(Concept = case_when(
Taxonomy %in% c("Attention", "Selective attention", "Divided attention", "Sustained attention") ~ "Attention",
Taxonomy %in% c("Memory", "Working memory", "Short-term memory", "Long-term memory") ~ "Memory",
Taxonomy %in% c("Cognitive control", "Cognitive load", "Extrinsic CL", "Intrinsic CL") ~ "Cognitive load",
Taxonomy == "Perception" ~ "Perception",
Taxonomy %in% c("Problem solving", "Reasoning", "Decision making") ~ "Reasoning",
Taxonomy %in% c("Cognitive biases") ~ "Cognitive biases",
Taxonomy %in% c("Knowledge", "Explicit knowledge", "Tacit knowledge", "Techn. tacit knowl.", "Cogn. tacit knowl.") ~ "Knowledge",
))
}
ggplot(data, aes(x=as.factor(Year))) +
geom_bar() +
ylab("Number of publications") +
xlab("Year") +
geom_text(stat='count', aes(label=..count..), vjust=2, color="white", size = 2.5) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave("PDFs/yearly_distribution.pdf", dpi = "screen")
## Saving 7 x 5 in image
# Cleaning not needed values
data<-data %>%
mutate(Academia = replace(Academia, Academia == "?", NA)) %>%
mutate(Industry = replace(Industry, Industry == "?", NA))
data<-data %>%
mutate(Type = case_when(is.na(Academia) & is.na(Industry) ~ "None",
Academia == "1" & is.na(Industry) ~ "Academia",
Industry == "1" & is.na(Academia) ~ "Industry",
TRUE ~ "Both"))
data %>%
mutate(Type = fct_infreq(Type, ordered = T)) %>%
ggplot(aes(x=Type)) +
geom_bar(width = .5) +
xlab("Type of publication") +
ylab("Number of publications") +
geom_text(stat='count', aes(label=..count..), vjust=3, color="white", size = 4) +
theme_bw()
ggsave("PDFs/academia_industry_distribution.pdf", dpi = "screen", width = unit(10, "inch"), height = unit(6.5, "inch"))
A publication can be in more than one category at the same time.
data %>%
select(all_of(swebok_areas_labels)) %>% # selecting columns corresponding to the SWEBoK Areas
mutate_all(replace_na,0) %>%
mutate(SEPP = as.numeric(SEPP)) %>%
summarise_all(sum) %>%
gather(key = "SWEBOKArea", value = "publications", 1:15) %>%
arrange(-publications) %>%
mutate(SWEBOKArea = factor(SWEBOKArea, SWEBOKArea)) %>%
ggplot(aes(x=SWEBOKArea, y=publications)) +
geom_bar(stat="identity") +
geom_text(aes(label=publications), vjust=-0.3, color="black", size = 4) +
xlab("SWEBoK Area") +
ylab("Number of publications") +
theme_bw()
ggsave("PDFs/swebok_distribution.pdf", dpi = "screen", width = unit(10, "inch"), height = unit(6.5, "inch"))
swebokareas<-data %>%
select(all_of(swebok_areas_labels)) %>% # selecting columns corresponding to the SWEBoK Areas
mutate_all(replace_na,0) %>%
as.matrix() %>%
crossprod()
swebokareas %>%
kable()
| SR | SD | SC | ST | SM | SCM | SEM | SEP | SEMM | SQ | SEPP | SEE | CF | MF | EF | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SR | 66 | 19 | 5 | 2 | 4 | 0 | 9 | 2 | 5 | 0 | 7 | 0 | 0 | 0 | 1 |
| SD | 19 | 84 | 19 | 5 | 4 | 0 | 7 | 2 | 6 | 1 | 7 | 0 | 0 | 0 | 1 |
| SC | 5 | 19 | 98 | 5 | 25 | 2 | 3 | 2 | 2 | 0 | 4 | 0 | 0 | 0 | 0 |
| ST | 2 | 5 | 5 | 21 | 5 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| SM | 4 | 4 | 25 | 5 | 58 | 2 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| SCM | 0 | 0 | 2 | 0 | 2 | 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| SEM | 9 | 7 | 3 | 1 | 2 | 0 | 41 | 3 | 1 | 1 | 9 | 3 | 0 | 0 | 1 |
| SEP | 2 | 2 | 2 | 0 | 1 | 1 | 3 | 12 | 0 | 0 | 2 | 1 | 0 | 0 | 0 |
| SEMM | 5 | 6 | 2 | 0 | 0 | 0 | 1 | 0 | 15 | 0 | 1 | 0 | 0 | 0 | 0 |
| SQ | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 10 | 0 | 0 | 0 | 0 | 0 |
| SEPP | 7 | 7 | 4 | 0 | 1 | 0 | 9 | 2 | 1 | 0 | 27 | 3 | 0 | 0 | 1 |
| SEE | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | 0 | 0 | 3 | 5 | 0 | 0 | 0 |
| CF | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| MF | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| EF | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
plot_ly(x=swebok_areas_labels, y=swebok_areas_labels, z=swebokareas, type="heatmap")
<<<<<<< HEAD
=======
>>>>>>> 2f5fc9924b4aa2e70717182be02776b86c33f31c
x <- data %>% select(all_of(swebok_areas_labels), all_of(cognitive_concepts_labels)) %>%
mutate_all(replace_na, 0) %>%
mutate(`Problem solving` = as.numeric(`Problem solving`)) %>%
gather(key="SWEBOK", value = pubs, swebok_areas_labels) %>% # use SWEBOK area as factor
filter(pubs > 0) %>% # select areas for which there are publications
group_by(SWEBOK, .add=T) %>%
# xtally() %>% # number of publication for each area
# select(-pubs) %>% # remove pubs to reuse it later
gather(key = "Taxonomy", value = "count", cognitive_concepts_labels) %>% # count publications in each cognitive taxonomy area
mutate(label = str_replace(as.character(count), "^0", "")) # add label for later
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(swebok_areas_labels)` instead of `swebok_areas_labels` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(cognitive_concepts_labels)` instead of `cognitive_concepts_labels` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
# Bubble plot
x <- arrange(x, Taxonomy)
xf<-x$Taxonomy
xfu<-unique(xf)
x$Taxonomy<-factor(xf,levels=xfu)
p<-ggplot(x)
p + geom_point(aes(x = fct_infreq(SWEBOK), y = fct_rev(Taxonomy), size=count), shape=21, fill="white", alpha=0.60) +
geom_text(aes(x = fct_infreq(SWEBOK), y = fct_rev(Taxonomy), label=label), size=2) +
theme(axis.text.x = element_text(angle = 45, hjust = 1.1, size=9,colour="black"), axis.text.y = element_text(size=8,colour="black"), axis.title.x = element_text(size=10), axis.title.y = element_text(size=10,colour = "black",vjust=0.12), panel.grid.major = element_line(linetype = "dashed", size=0.1, color="black"))+
labs(x="SWEBOK Area",y = "Taxonomy Area") + theme_bw()
## Warning: Using size for a discrete variable is not advised.
ggsave("PDFs/swebok_taxonomy_bubble.pdf", dpi = "screen")
## Saving 7 x 5 in image
## Warning: Using size for a discrete variable is not advised.
# Preparing the dataset for analysing the research methods
data<-data %>%
mutate(Quantitative = case_when(`Quantit. measures` == 1 | `Task performance` == 1 | `Physiological meas.` == 1 | `Subjective ratings` == 1 | `Behavioral meas.` == 1 ~ 1)) %>%
mutate(Quantitative = replace_na(Quantitative, 0)) %>%
mutate(Qualitative = case_when(Fieldwork == 1 | Interview == 1 | `Qualit. measures` == 1 | `Task-based` == 1 | `Open observation` == 1 ~ 1)) %>%
mutate(Qualitative = replace_na(Qualitative, 0)) %>%
mutate(Both = if_else(Qualitative == 1 & Quantitative == 1, 1, 0))
Number of publications per year according to SWEBOK areas
# Creating a temp dataset with missing publications years (i.e., year for which there was no publication)
cols_fill_years_swebok <- get_scale_fill(get_pal(palette(col25)))
data %>%
filter(is.na(Exclude)) %>%
select(c(Year, SR:EF)) %>%
gather("SWEBOK", "publications", 2:16) %>%
mutate_all(replace_na, 0) %>%
mutate(publications = as.numeric(publications)) %>%
group_by(Year,SWEBOK) %>%
summarise(total=sum(publications)) %>%
<<<<<<< HEAD
ggplot(aes(x=as.factor(Year), fill=SWEBOK, y=total)) +
geom_bar(stat="sum") +
xlab("Year") + ylab("Publications") + scale_fill_discrete(name = "SWEBOK Areas") +
guides(size = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
=======
ggplot(aes(x=as.factor(Year), fill=SWEBOK, y=total)) + geom_bar(stat="sum") +
xlab("Year") + ylab("Publications") + scale_fill_discrete(name = "SWEBOK Areas") + guides(size = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6)) +
>>>>>>> 2f5fc9924b4aa2e70717182be02776b86c33f31c
cols_fill_years_swebok()
## `summarise()` regrouping output by 'Year' (override with `.groups` argument)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
ggsave("PDFs/years_swebok.pdf", dpi = "screen", width = unit(10, "inch"), height = unit(6.5, "inch"))
data <- data %>% complete(Year=seq(1973,2016))
data <- data %>%
mutate(research_method = if_else(Both==1, "Mixed", if_else(Qualitative==1, "Qualitative", "Quantitative"))) %>%
filter(!is.na(research_method))
data %>% ggplot(aes(x=as.factor(Year), fill=research_method)) + geom_bar() +
scale_fill_discrete(name="Research method", labels = c("Mixed", "Qualitative", "Quantitative", "")) +
xlab("Year") + ylab("Publications") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5))
ggsave("PDFs/years_researchmethods.pdf", dpi = "screen", width = unit(10, "inch"), height = unit(6.5, "inch"))
data.swebok.researchmethod <- data %>%
select(swebok_areas_labels, research_method) %>%
mutate_all(replace_na,0) %>%
filter(research_method != 0) %>%
mutate(SEPP = as.numeric(SEPP)) %>% # quickfix. For some reason SEPP column is not cast to numeric
group_by(research_method) %>%
summarise_at(vars(swebok_areas_labels), sum) %>%
gather("SWEBOK", "Publications", swebok_areas_labels)
data.swebok.researchmethod %>%
ggplot(aes(x=reorder(SWEBOK, Publications, function(x){sum(x)}), y=Publications, fill=research_method)) + geom_bar(stat = "identity") +
coord_flip() + xlab("SWEBOK areas") + scale_fill_discrete(name = "Research method")
ggsave("PDFs/SWEBOK_researchmethods.pdf", dpi = "screen", width = unit(10, "inch"), height = unit(6.5, "inch"))
data %>%
filter(!is.na(Identifier)) %>%
select(Identifier, all_of(cognitive_concepts_labels), measures_labels) %>%
gather(Taxonomy, value, all_of(cognitive_concepts_labels)) %>%
filter(!is.na(value)) %>%
select(-value) %>%
gather(Method, value, measures_labels) %>%
filter(!is.na(value)) %>%
arrange(Identifier) %>%
select(-Identifier, -value) %>%
group_by(Taxonomy, Method) %>%
tally(name = "Amount") %>%
filter(Method!='Qualit. measures' & Method!='Quantit. measures') %>%
ggplot(aes(x=fct_relevel(Method, measures_labels), y=Taxonomy, size=Amount)) +
geom_point(aes(alpha=0.8)) +
scale_size_continuous(range = c(3, 12)) +
xlab("Cognitive Assessment Procedures") +
ylab("Cognitive Concepts") +
geom_vline(xintercept = 4.5, size=0.5, color="darkgrey") +
annotate(geom="text", x=6.5, y=0.7, label="Quantitative", size=3, alpha=0.9) +
annotate(geom="text", x=2.5, y=0.7, label="Qualitative", size=3, alpha=0.9) +
theme(legend.position = "", axis.text.x = element_text(angle = 30, hjust = 1, size = 8))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(measures_labels)` instead of `measures_labels` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## Warning: Unknown levels in `f`: Qualit. measures, Quantit. measures
## Warning: Unknown levels in `f`: Qualit. measures, Quantit. measures
ggsave("PDFs/taxonomy_methods.pdf", dpi = "screen", width = unit(10, "inch"), height = unit(6.5, "inch"))
## Warning: Unknown levels in `f`: Qualit. measures, Quantit. measures
## Warning: Unknown levels in `f`: Qualit. measures, Quantit. measures
data %>%
select(all_of(swebok_areas_labels_no_foundation), all_of(cognitive_concepts_labels)) %>%
mutate_all(replace_na,0) %>%
gather(Taxonomy, value2, cognitive_concepts_labels) %>%
add_high_level_concepts_to_data() %>%
gather(SWEBOK, value, swebok_areas_labels_no_foundation) %>%
count(SWEBOK, Concept, value, value2) %>%
mutate(freq=ifelse(value==1 & value2==1, n, 0)) %>%
distinct(SWEBOK, Concept, freq) %>%
group_by(SWEBOK, Concept) %>%
summarize(total=sum(freq)) %>%
ungroup() %>%
ggplot(aes(fct_relevel(SWEBOK, swebok_areas_labels_no_foundation), fct_rev(Concept), fill=total)) +
geom_tile() + scale_fill_continuous(low="#fff9f7", high="red") +
xlab("SWEBOK areas") + ylab("Cognitive Concepts") + guides(fill=guide_legend(title="")) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 8)) +
scale_x_discrete(labels = swebok_areas_labels_long)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(swebok_areas_labels_no_foundation)` instead of `swebok_areas_labels_no_foundation` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## `summarise()` regrouping output by 'SWEBOK' (override with `.groups` argument)
ggsave("PDFs/taxomony_swebok_cooccurences.pdf", width = unit(10, "inch"), height = unit(6.5, "inch"))
data %>%
select(cognitive_concepts_labels, measures_labels) %>%
mutate_all(replace_na,0) %>%
gather(Taxonomy, value, cognitive_concepts_labels) %>%
add_high_level_concepts_to_data() %>%
gather(Method, value2, measures_labels) %>%
count(Concept, Method, value, value2) %>%
mutate(freq=ifelse(value==1 & value2==1, n, 0)) %>%
mutate(freq=as.integer(freq)) %>%
mutate(Method=fct_relevel(Method, measures_labels)) %>%
filter(Method != 'Qualit. measures' & Method != 'Quantit. measures') %>%
<<<<<<< HEAD
filter(freq > 0) %>%
=======
>>>>>>> 2f5fc9924b4aa2e70717182be02776b86c33f31c
ggplot(aes(Method, Concept, fill=freq)) +
geom_tile() +
geom_vline(xintercept = 4.5, size=0.5, color="darkgrey") +
xlab("Cognitive Assessment Procedures") + ylab("Cognitive Concepts") +
guides(fill=guide_legend(title="")) + # scale_x_discrete(labels=c("Fieldwork", "Interview", "Task-based", "Open observation", "Others", "Task performance", "Physiological meas.", "Subjective ratings", "Behavioral meas.", "Others")) + # not using measure_lables here since we need a catch-all "Others" category
<<<<<<< HEAD
scale_fill_continuous(low="#fff9f7", high="darkgreen") +
=======
scale_fill_continuous(low="#fff9f7", high="darkgreen") +
>>>>>>> 2f5fc9924b4aa2e70717182be02776b86c33f31c
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 8)) +
annotate(geom="text", x=6.5, y=0.7, label="Quantitative", size=3, alpha=0.6) +
annotate(geom="text", x=2.5, y=0.7, label="Qualitative", size=3, alpha=0.6) +
ggsave("PDFs/taxonomy_method_cooccurences.pdf", dpi = "screen", width = unit(10, "inch"), height = unit(6.5, "inch"))
<<<<<<< HEAD
data %>%
select(Year, cognitive_concepts_labels)%>%
filter(Year > 0) %>%
gather("Taxonomy", "publications", cognitive_concepts_labels) %>%
mutate_all(replace_na,0) %>%
mutate(publications=as.integer(publications)) %>%
group_by(Year, Taxonomy) %>%
summarise(total=sum(publications)) %>%
ggplot(aes(as.factor(Year), total, fill=Taxonomy)) + geom_bar(stat="sum") + xlab("Year") + ylab("Publications") +
scale_fill_discrete(name = "Taxonomy Areas") + guides(size = F) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6))
## `summarise()` regrouping output by 'Year' (override with `.groups` argument)
<<<<<<< HEAD
cols_fill_taxonomy_years <- get_scale_fill(get_pal(pals::stepped()))
df.taxonomy <- data %>%
select(Year, all_of(cognitive_concepts_labels)) %>%
gather("Taxonomy", "publications", cognitive_concepts_labels) %>%
mutate_all(replace_na,0) %>%
mutate(publications=as.integer(publications)) %>% # for some reseason recognized as char
filter(publications>0)
# need to create a separated df to hold the percentage of publications within each year
data.percentage <- df.taxonomy %>%
group_by(Year) %>%
count(Taxonomy) %>%
mutate(ratio = scales::percent(n/sum(n)))
df.taxonomy %>%
ggplot(aes(x = as.factor(Year), fill = as.factor(Taxonomy))) +
geom_bar(position="fill") +
geom_text(data = data.percentage, aes(y = n,label = ratio), position = position_fill(vjust = 0.5), colour = "black", size = 1.3, alpha=0.5) +
xlab("Year") + ylab("Publications %") +
guides(size = F, fill=guide_legend(title="Concepts (including sub-concepts)")) +
scale_y_continuous(labels = percent) +
theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
theme(legend.key.size = unit(.2, "cm"), legend.key.width = unit(0.2,"cm"), legend.title = element_text(size = 8), legend.text = element_text(size = 6)) +
cols_fill_taxonomy_years()
ggsave("PDFs/taxonomy_years.pdf", width = unit(10, "inch"), height = unit(6.5, "inch"))
df.concepts <- df.taxonomy %>%
add_high_level_concepts_to_data()
df.years <- data %>% filter(!(Year %in% years_no_publications)) %>% count(Year) # years without publications
ggplot() +
geom_bar(data=df.concepts, aes(x=as.factor(Year), fill=Concept), position="fill") +
geom_line(data=df.years, aes(x=as.factor(Year), y=n/max(n), group=1), size=0.8) +
geom_point(data=df.years, aes(x=as.factor(Year), y=n/max(n), group=1)) +
scale_y_continuous(labels = function(x)x*100, name="Publication %", sec.axis = sec_axis(name="Total publications", ~. * max(df.years$n), breaks=scales::breaks_extended(10))) +
xlab("Year") +
theme(panel.background = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_text(margin = margin(-15,0,0,0, "pt")), axis.text.x = element_text(angle = 45, hjust = 1, size = 8, vjust = 2.4)) +
scale_fill_manual(values = NPG_modified)
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
ggsave("PDFs/cc_cm_classification_wide_colored.pdf", width = unit(10, "inch"), height = unit(6.5, "inch"))
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
data %>%
filter(!is.na(publication_type)) %>%
ggplot(aes(x=fct_rev(fct_infreq(publication_type)))) +
geom_bar(stat="count", width=.4) +
scale_x_discrete("Publication outlets", labels=c("Magazines", "Books", "Workshops", "Conferences", "Journals" )) +
scale_y_continuous("Number of publications", breaks = scales::breaks_extended(10)) +
coord_flip() +
theme_minimal() +
theme(axis.text.y = element_text(angle = 90, hjust=.5, vjust=-0.3))
ggsave("PDFs/outlets.pdf", dpi = "screen", width = unit(10, "inch"), height = unit(6.5, "inch"))
data %>%
filter(!is.na(publication_type)) %>%
select(all_of(cognitive_concepts_labels), publication_type) %>%
mutate_all(replace_na,0) %>%
gather(Taxonomy, value2, cognitive_concepts_labels) %>%
add_high_level_concepts_to_data() %>%
mutate(value2 = as.integer(value2)) %>%
filter(value2 > 0) %>%
ggplot(aes(x=fct_rev(fct_infreq(Concept)))) +
geom_bar(stat="count", aes(fill=publication_type)) +
xlab("Cognitive Concepts") +
scale_y_continuous("Number of publications", breaks = scales::breaks_extended(10)) +
coord_flip() +
scale_fill_discrete(name = "Publication outlets", labels = c("Books", "Conferences", "Journals", "Workshops", "Magazines")) +
theme_minimal()
ggsave("PDFs/outlets_concepts.pdf", dpi = "screen", width = unit(10, "inch"), height = unit(6.5, "inch"))
data %>%
group_by(publication_type) %>%
tally() %>%
arrange(publication_type, n) %>%
top_n(5) %>%
kable(col.names = c("Outlet", "#"))
## Selecting by n
| Outlet | # |
|---|---|
| B | 9 |
| C | 150 |
| J | 133 |
| M | 3 |
| W | 18 |